num <- num[-c(1:length(LETTERS))]
cbind(num, out$week, out$count_cases, sort(num))
print(cor(as.numeric(out$week), num), metho = "spearman")
data <- dta.w[count_cases >= quantile(dta.w$count_cases, p = 0.05), ]
data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
out <- data[bail_year == 2013 & dup == 0, ]
out <- out[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
num <- num[-c(1:length(LETTERS))]
cbind(num, out$week, out$count_cases, sort(num))
print(cor(as.numeric(out$week), num), metho = "spearman")
data <- dta.w[count_cases >= quantile(dta.w$count_cases, p = 0.05), ]
data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
out <- data[bail_year == 2014 & dup == 0, ]
out <- out[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
num <- num[-c(1:length(LETTERS))]
cbind(num, out$week, out$count_cases, sort(num))
print(cor(as.numeric(out$week), num), method = "spearman")
data <- dta.w[count_cases >= quantile(dta.w$count_cases, p = 0.05), ]
data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
out <- data[bail_year == 2015 & dup == 0, ]
out <- out[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
num <- num[-c(1:length(LETTERS))]
cbind(num, out$week, out$count_cases, sort(num))
print(cor(as.numeric(out$week), num), method = "spearman")
data <- dta.w[count_cases >= quantile(dta.w$count_cases, p = 0.05), ]
data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
out <- data[bail_year == 2016 & dup == 0, ]
out <- out[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
num <- num[-c(1:length(LETTERS))]
cbind(num, out$week, out$count_cases, sort(num))
print(cor(as.numeric(out$week), num), method = "spearman")
data[bail_year == 2016 & dup == 0, ]
rm(list = ls())
library(data.table)
load("~/Dropbox/McDonough extension project/Miami/Data/MiamiDade7658Weekends.RData")
dta.w$week <- strftime(as.Date(dta.w$start_date_fa_seq), format = "%V")
dta.w <- dta.w[judge_cat != "UNNAMED", ]
dta.w <- dta.w[!(last_name == "UNKNOWN" | first_name == "UNKNOWN"), ]
dta.w <- dta.w[arrest_date != "00000000"]
dta.w$dup <- duplicated(dta.w$case_number)
dta.w <- dta.w[dup == 0, ]
dta.w$week_year <- paste(dta.w$bail_year, dta.w$week, sep = "-")
dta.w$dup <- duplicated(paste(dta.w$hybrid_id, dta.w$week_year, sep = "-"))
dta.w <- dta.w[dup == 0, ]
dta.w[, count_cases := .N, by = c("judge_cat", "week", "bail_year")]
dta.w[, count_cases_2 := .N, by = c("judge_cat", "bail_year")]
dta.w$judge_cat[dta.w$judge_cat == "MIRIAM LEHR"] <- "MYRIAM LEHR"
# 1.  do weekend judges in a given year have a similar number of
# weekend shifts in that year.
# That is, does a judge  assigned to weekends
# in a given year have about the same number of weekend shifts as
# other judges assigned to weekends that year?
# Number of weekend shifts is different from number of cases per weekend shift.
## 2009
for(y in 2009:2016) {
data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]
min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 )
per.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ) <= 2)
print(y)
print(c(ave.j, std.j, med.j, min.j, max.j, num.j, per.j))
}
## 2. do weekend judges in a given year have a similar
## total number of weekend cases per year? Your #1 below
## refers to a similar number of weekend cases per shift.
## That’s helpful, but do we also need to know if total number
## of weekend cases per judge in that year are balanced?
## 2009
for(y in 2009:2016) {
data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]
# data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
# out <- data[bail_year == y & dup == 0, ]
# out <- out[order(week), ]
# num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
# num <- num[-c(1:length(LETTERS))]
# cbind(num, out$week, out$count_cases, sort(num))
# print(cor(as.numeric(out$week), num))
min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]))
print(y)
print(c(ave.j, std.j, med.j, min.j, max.j, num.j))
}
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2009], p = 0.05), ]
# t09 <- data.table(table(data$judge_cat[data$bail_year == 2009], data$week[data$bail_year == 2009]))
# t09 <- t09[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2010], p = 0.05), ]
# t10 <- data.table(table(data$judge_cat[data$bail_year == 2010], data$week[data$bail_year == 2010]))
# t10 <- t10[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2011], p = 0.05), ]
# t11 <- data.table(table(data$judge_cat[data$bail_year == 2011], data$week[data$bail_year == 2011]))
# t11 <- t11[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2012], p = 0.05), ]
# t12 <- data.table(table(data$judge_cat[data$bail_year == 2012], data$week[data$bail_year == 2012]))
# t12 <- t12[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2013], p = 0.05), ]
# t13 <- data.table(table(data$judge_cat[data$bail_year == 2013], data$week[data$bail_year == 2013]))
# t13 <- t13[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2014], p = 0.05), ]
# t14 <- data.table(table(data$judge_cat[data$bail_year == 2014], data$week[data$bail_year == 2014]))
# t14 <- t14[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2015], p = 0.05), ]
# t15 <- data.table(table(data$judge_cat[data$bail_year == 2015], data$week[data$bail_year == 2015]))
# t15 <- t15[N > 0]
#
# out1 <- merge(t09, t10, by = "V1")
# length(unique(out1$V1))
# cor(as.numeric(out1$V2.x), as.numeric(out1$V2.y))
#
# out2 <- merge(t10, t11, by = "V1")
# length(unique(out2$V1))
# cor(as.numeric(out2$V2.x), as.numeric(out2$V2.y))
#
# out3 <- merge(t11, t12, by = "V1")
# length(unique(out3$V1))
# cor(as.numeric(out3$V2.x), as.numeric(out3$V2.y))
#
# out4 <- merge(t13, t14, by = "V1")
# length(unique(out4$V1))
# cor(as.numeric(out4$V2.x), as.numeric(out4$V2.y))
#
# out5 <- merge(t14, t15, by = "V1")
# length(unique(out5$V1))
# cor(as.numeric(out5$V2.x), as.numeric(out5$V2.y))
#
#
#
data <- dta.w[count_cases >= quantile(dta.w$count_cases, p = 0.05), ]
data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
write.csv(data, file = "~/Dropbox/McDonough extension project/Miami/Data/order.csv", row.names = F)
rm(list = ls())
library(data.table)
load("~/Dropbox/McDonough extension project/Miami/Data/MiamiDade7658Weekends.RData")
dta.w$week <- strftime(as.Date(dta.w$start_date_fa_seq), format = "%V")
dta.w <- dta.w[judge_cat != "UNNAMED", ]
dta.w <- dta.w[!(last_name == "UNKNOWN" | first_name == "UNKNOWN"), ]
dta.w <- dta.w[arrest_date != "00000000"]
dta.w$dup <- duplicated(dta.w$case_number)
dta.w <- dta.w[dup == 0, ]
dta.w$week_year <- paste(dta.w$bail_year, dta.w$week, sep = "-")
dta.w$dup <- duplicated(paste(dta.w$hybrid_id, dta.w$week_year, sep = "-"))
dta.w <- dta.w[dup == 0, ]
dta.w[, count_cases := .N, by = c("judge_cat", "week", "bail_year")]
dta.w[, count_cases_2 := .N, by = c("judge_cat", "bail_year")]
dta.w$judge_cat[dta.w$judge_cat == "MIRIAM LEHR"] <- "MYRIAM LEHR"
# 1.  do weekend judges in a given year have a similar number of
# weekend shifts in that year.
# That is, does a judge  assigned to weekends
# in a given year have about the same number of weekend shifts as
# other judges assigned to weekends that year?
# Number of weekend shifts is different from number of cases per weekend shift.
## 2009
for(y in 2009:2016) {
data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]
min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 )
per.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ) <= 2)
print(y)
print(c(ave.j, std.j, med.j, min.j, max.j, num.j, per.j))
}
## 2. do weekend judges in a given year have a similar
## total number of weekend cases per year? Your #1 below
## refers to a similar number of weekend cases per shift.
## That’s helpful, but do we also need to know if total number
## of weekend cases per judge in that year are balanced?
## 2009
for(y in 2009:2016) {
data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]
# data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
# out <- data[bail_year == y & dup == 0, ]
# out <- out[order(week), ]
# num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
# num <- num[-c(1:length(LETTERS))]
# cbind(num, out$week, out$count_cases, sort(num))
# print(cor(as.numeric(out$week), num))
min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]))
print(y)
print(c(ave.j, std.j, med.j, min.j, max.j, num.j))
}
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2009], p = 0.05), ]
# t09 <- data.table(table(data$judge_cat[data$bail_year == 2009], data$week[data$bail_year == 2009]))
# t09 <- t09[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2010], p = 0.05), ]
# t10 <- data.table(table(data$judge_cat[data$bail_year == 2010], data$week[data$bail_year == 2010]))
# t10 <- t10[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2011], p = 0.05), ]
# t11 <- data.table(table(data$judge_cat[data$bail_year == 2011], data$week[data$bail_year == 2011]))
# t11 <- t11[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2012], p = 0.05), ]
# t12 <- data.table(table(data$judge_cat[data$bail_year == 2012], data$week[data$bail_year == 2012]))
# t12 <- t12[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2013], p = 0.05), ]
# t13 <- data.table(table(data$judge_cat[data$bail_year == 2013], data$week[data$bail_year == 2013]))
# t13 <- t13[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2014], p = 0.05), ]
# t14 <- data.table(table(data$judge_cat[data$bail_year == 2014], data$week[data$bail_year == 2014]))
# t14 <- t14[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2015], p = 0.05), ]
# t15 <- data.table(table(data$judge_cat[data$bail_year == 2015], data$week[data$bail_year == 2015]))
# t15 <- t15[N > 0]
#
# out1 <- merge(t09, t10, by = "V1")
# length(unique(out1$V1))
# cor(as.numeric(out1$V2.x), as.numeric(out1$V2.y))
#
# out2 <- merge(t10, t11, by = "V1")
# length(unique(out2$V1))
# cor(as.numeric(out2$V2.x), as.numeric(out2$V2.y))
#
# out3 <- merge(t11, t12, by = "V1")
# length(unique(out3$V1))
# cor(as.numeric(out3$V2.x), as.numeric(out3$V2.y))
#
# out4 <- merge(t13, t14, by = "V1")
# length(unique(out4$V1))
# cor(as.numeric(out4$V2.x), as.numeric(out4$V2.y))
#
# out5 <- merge(t14, t15, by = "V1")
# length(unique(out5$V1))
# cor(as.numeric(out5$V2.x), as.numeric(out5$V2.y))
#
#
#
data <- dta.w[count_cases >= quantile(dta.w$count_cases, p = 0.05), ]
data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
out <- data[bail_year == 2016 & dup == 0, ]
out <- out[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
num <- num[-c(1:length(LETTERS))]
out2 <- out2[order(week_year), ]
rm(list = ls())
library(data.table)
load("~/Dropbox/McDonough extension project/Miami/Data/MiamiDade7658Weekends.RData")
dta.w$week <- strftime(as.Date(dta.w$start_date_fa_seq), format = "%V")
dta.w <- dta.w[judge_cat != "UNNAMED", ]
dta.w <- dta.w[!(last_name == "UNKNOWN" | first_name == "UNKNOWN"), ]
dta.w <- dta.w[arrest_date != "00000000"]
dta.w$dup <- duplicated(dta.w$case_number)
dta.w <- dta.w[dup == 0, ]
dta.w$week_year <- paste(dta.w$bail_year, dta.w$week, sep = "-")
dta.w$dup <- duplicated(paste(dta.w$hybrid_id, dta.w$week_year, sep = "-"))
dta.w <- dta.w[dup == 0, ]
dta.w[, count_cases := .N, by = c("judge_cat", "week", "bail_year")]
dta.w[, count_cases_2 := .N, by = c("judge_cat", "bail_year")]
dta.w$judge_cat[dta.w$judge_cat == "MIRIAM LEHR"] <- "MYRIAM LEHR"
# 1.  do weekend judges in a given year have a similar number of
# weekend shifts in that year.
# That is, does a judge  assigned to weekends
# in a given year have about the same number of weekend shifts as
# other judges assigned to weekends that year?
# Number of weekend shifts is different from number of cases per weekend shift.
## 2009
for(y in 2009:2016) {
data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]
min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 )
per.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ) <= 2)
print(y)
print(c(ave.j, std.j, med.j, min.j, max.j, num.j, per.j))
}
## 2. do weekend judges in a given year have a similar
## total number of weekend cases per year? Your #1 below
## refers to a similar number of weekend cases per shift.
## That’s helpful, but do we also need to know if total number
## of weekend cases per judge in that year are balanced?
## 2009
for(y in 2009:2016) {
data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]
# data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
# out <- data[bail_year == y & dup == 0, ]
# out <- out[order(week), ]
# num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
# num <- num[-c(1:length(LETTERS))]
# cbind(num, out$week, out$count_cases, sort(num))
# print(cor(as.numeric(out$week), num))
min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]))
print(y)
print(c(ave.j, std.j, med.j, min.j, max.j, num.j))
}
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2009], p = 0.05), ]
# t09 <- data.table(table(data$judge_cat[data$bail_year == 2009], data$week[data$bail_year == 2009]))
# t09 <- t09[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2010], p = 0.05), ]
# t10 <- data.table(table(data$judge_cat[data$bail_year == 2010], data$week[data$bail_year == 2010]))
# t10 <- t10[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2011], p = 0.05), ]
# t11 <- data.table(table(data$judge_cat[data$bail_year == 2011], data$week[data$bail_year == 2011]))
# t11 <- t11[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2012], p = 0.05), ]
# t12 <- data.table(table(data$judge_cat[data$bail_year == 2012], data$week[data$bail_year == 2012]))
# t12 <- t12[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2013], p = 0.05), ]
# t13 <- data.table(table(data$judge_cat[data$bail_year == 2013], data$week[data$bail_year == 2013]))
# t13 <- t13[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2014], p = 0.05), ]
# t14 <- data.table(table(data$judge_cat[data$bail_year == 2014], data$week[data$bail_year == 2014]))
# t14 <- t14[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2015], p = 0.05), ]
# t15 <- data.table(table(data$judge_cat[data$bail_year == 2015], data$week[data$bail_year == 2015]))
# t15 <- t15[N > 0]
#
# out1 <- merge(t09, t10, by = "V1")
# length(unique(out1$V1))
# cor(as.numeric(out1$V2.x), as.numeric(out1$V2.y))
#
# out2 <- merge(t10, t11, by = "V1")
# length(unique(out2$V1))
# cor(as.numeric(out2$V2.x), as.numeric(out2$V2.y))
#
# out3 <- merge(t11, t12, by = "V1")
# length(unique(out3$V1))
# cor(as.numeric(out3$V2.x), as.numeric(out3$V2.y))
#
# out4 <- merge(t13, t14, by = "V1")
# length(unique(out4$V1))
# cor(as.numeric(out4$V2.x), as.numeric(out4$V2.y))
#
# out5 <- merge(t14, t15, by = "V1")
# length(unique(out5$V1))
# cor(as.numeric(out5$V2.x), as.numeric(out5$V2.y))
#
#
#
data <- dta.w[count_cases >= quantile(dta.w$count_cases, p = 0.05), ]
data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
out <- data[bail_year == 2016 & dup == 0, ]
out <- out[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
num <- num[-c(1:length(LETTERS))]
out2 <- data[dup == 0, c("week_year", "judge_cat", , "bail_year", "count_cases", "num")]
out2 <- data[dup == 0, c("week_year", "judge_cat", , "bail_year", "count_cases", "num"), with = F]
out2 <- data[dup == 0, c("week_year", "judge_cat", , "bail_year", "count_cases", "num"), with = F]
out2 <- data[dup == 0, c("week_year", "judge_cat", "bail_year", "count_cases", "num"), with = F]
out2 <- data[dup == 0, ]
out2 <- out2[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
out2$num <- num[-c(1:length(LETTERS))]
num
out2 <- data[dup == 0, ]
out2 <- out2[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out2$fa_last_name, 1, 1))))
out2$num <- num[-c(1:length(LETTERS))]
out2
out2 <- out2[, c("week_year", "judge_cat", "bail_year", "count_cases", "num"), with = F]
out2
rm(list = ls())
library(data.table)
load("~/Dropbox/McDonough extension project/Miami/Data/MiamiDade7658Weekends.RData")
dta.w$week <- strftime(as.Date(dta.w$start_date_fa_seq), format = "%V")
dta.w <- dta.w[judge_cat != "UNNAMED", ]
dta.w <- dta.w[!(last_name == "UNKNOWN" | first_name == "UNKNOWN"), ]
dta.w <- dta.w[arrest_date != "00000000"]
dta.w$dup <- duplicated(dta.w$case_number)
dta.w <- dta.w[dup == 0, ]
dta.w$week_year <- paste(dta.w$bail_year, dta.w$week, sep = "-")
dta.w$dup <- duplicated(paste(dta.w$hybrid_id, dta.w$week_year, sep = "-"))
dta.w <- dta.w[dup == 0, ]
dta.w[, count_cases := .N, by = c("judge_cat", "week", "bail_year")]
dta.w[, count_cases_2 := .N, by = c("judge_cat", "bail_year")]
dta.w$judge_cat[dta.w$judge_cat == "MIRIAM LEHR"] <- "MYRIAM LEHR"
# 1.  do weekend judges in a given year have a similar number of
# weekend shifts in that year.
# That is, does a judge  assigned to weekends
# in a given year have about the same number of weekend shifts as
# other judges assigned to weekends that year?
# Number of weekend shifts is different from number of cases per weekend shift.
## 2009
for(y in 2009:2016) {
data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]
min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ))
num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 )
per.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]) > 0 ) <= 2)
print(y)
print(c(ave.j, std.j, med.j, min.j, max.j, num.j, per.j))
}
## 2. do weekend judges in a given year have a similar
## total number of weekend cases per year? Your #1 below
## refers to a similar number of weekend cases per shift.
## That’s helpful, but do we also need to know if total number
## of weekend cases per judge in that year are balanced?
## 2009
for(y in 2009:2016) {
data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == y], p = 0.05), ]
# data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
# out <- data[bail_year == y & dup == 0, ]
# out <- out[order(week), ]
# num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
# num <- num[-c(1:length(LETTERS))]
# cbind(num, out$week, out$count_cases, sort(num))
# print(cor(as.numeric(out$week), num))
min.j <- min(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
std.j <- sd(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
ave.j <- mean(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
med.j <- median(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
max.j <- max(colSums(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y])))
num.j <- ncol(table(data$week_year[data$bail_year == y], data$judge_cat[data$bail_year == y]))
print(y)
print(c(ave.j, std.j, med.j, min.j, max.j, num.j))
}
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2009], p = 0.05), ]
# t09 <- data.table(table(data$judge_cat[data$bail_year == 2009], data$week[data$bail_year == 2009]))
# t09 <- t09[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2010], p = 0.05), ]
# t10 <- data.table(table(data$judge_cat[data$bail_year == 2010], data$week[data$bail_year == 2010]))
# t10 <- t10[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2011], p = 0.05), ]
# t11 <- data.table(table(data$judge_cat[data$bail_year == 2011], data$week[data$bail_year == 2011]))
# t11 <- t11[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2012], p = 0.05), ]
# t12 <- data.table(table(data$judge_cat[data$bail_year == 2012], data$week[data$bail_year == 2012]))
# t12 <- t12[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2013], p = 0.05), ]
# t13 <- data.table(table(data$judge_cat[data$bail_year == 2013], data$week[data$bail_year == 2013]))
# t13 <- t13[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2014], p = 0.05), ]
# t14 <- data.table(table(data$judge_cat[data$bail_year == 2014], data$week[data$bail_year == 2014]))
# t14 <- t14[N > 0]
#
# data <- dta.w[count_cases >= quantile(dta.w$count_cases[dta.w$bail_year == 2015], p = 0.05), ]
# t15 <- data.table(table(data$judge_cat[data$bail_year == 2015], data$week[data$bail_year == 2015]))
# t15 <- t15[N > 0]
#
# out1 <- merge(t09, t10, by = "V1")
# length(unique(out1$V1))
# cor(as.numeric(out1$V2.x), as.numeric(out1$V2.y))
#
# out2 <- merge(t10, t11, by = "V1")
# length(unique(out2$V1))
# cor(as.numeric(out2$V2.x), as.numeric(out2$V2.y))
#
# out3 <- merge(t11, t12, by = "V1")
# length(unique(out3$V1))
# cor(as.numeric(out3$V2.x), as.numeric(out3$V2.y))
#
# out4 <- merge(t13, t14, by = "V1")
# length(unique(out4$V1))
# cor(as.numeric(out4$V2.x), as.numeric(out4$V2.y))
#
# out5 <- merge(t14, t15, by = "V1")
# length(unique(out5$V1))
# cor(as.numeric(out5$V2.x), as.numeric(out5$V2.y))
#
#
#
data <- dta.w[count_cases >= quantile(dta.w$count_cases, p = 0.05), ]
data$dup <- duplicated(paste(data$week_year, data$judge_cat, sep = "-"))
out <- data[bail_year == 2016 & dup == 0, ]
out <- out[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out$fa_last_name, 1, 1))))
num <- num[-c(1:length(LETTERS))]
out2 <- data[dup == 0, ]
out2 <- out2[order(week_year), ]
num <- as.numeric(as.factor(c(LETTERS, substr(out2$fa_last_name, 1, 1))))
out2$num <- num[-c(1:length(LETTERS))]
out2 <- out2[, c("week_year", "judge_cat", "bail_year", "count_cases", "num"), with = F]
write.csv(out2, file = "~/Dropbox/McDonough extension project/Miami/Data/order.csv", row.names = F)
